03 PyTorch CPU to GPU copy


In [1]:
% reset -f
from __future__ import print_function
from __future__ import division
import math
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
import torch
import sys
print('__Python VERSION:', sys.version)
print('__pyTorch VERSION:', torch.__version__)
print('__CUDA VERSION')
from subprocess import call
# call(["nvcc", "--version"]) does not work
! nvcc --version
print('__CUDNN VERSION:', torch.backends.cudnn.version())
print('__Number CUDA Devices:', torch.cuda.device_count())
print('__Devices')
call(["nvidia-smi", "--format=csv", "--query-gpu=index,name,driver_version,memory.total,memory.used,memory.free"])
print('Active CUDA Device: GPU', torch.cuda.current_device())

print ('Available devices ', torch.cuda.device_count())
print ('Current cuda device ', torch.cuda.current_device())


__Python VERSION: 2.7.12 (default, Nov 19 2016, 06:48:10) 
[GCC 5.4.0 20160609]
__pyTorch VERSION: 0.1.12+4eb448a
__CUDA VERSION
nvcc: NVIDIA (R) Cuda compiler driver
Copyright (c) 2005-2016 NVIDIA Corporation
Built on Tue_Jan_10_13:22:03_CST_2017
Cuda compilation tools, release 8.0, V8.0.61
__CUDNN VERSION: 5110
__Number CUDA Devices: 1
__Devices
Active CUDA Device: GPU 0
Available devices  1
Current cuda device  0

Alloocate a PyTorch Tensor on the GPU


In [3]:
x=torch.Tensor(3,4)
if torch.cuda.is_available():
    x = x.cuda()*2     
print (type(x))
print (x)


<class 'torch.cuda.FloatTensor'>

1.00000e-31 *
 -6.5516  0.0000  0.0000  0.0000
 -0.0008  0.0000 -0.0011  0.0000
 -0.0009  0.0000 -0.0011  0.0000
[torch.cuda.FloatTensor of size 3x4 (GPU 0)]


In [13]:
import numpy as np
import torch.cuda as cu
import contextlib
import time


# allocates a tensor on GPU 1
a = torch.cuda.FloatTensor(1)

# transfers a tensor from CPU to GPU 1
b = torch.FloatTensor(1).cuda()
    
# Timing helper with CUDA synchonization
@contextlib.contextmanager
def timing(name):
    cu.synchronize()
    start_time = time.time()
    yield
    cu.synchronize()
    end_time = time.time()
    print ('{} {:6.3f} seconds'.format(name, end_time-start_time))
    
    
for shape in [(128**3,), (128,128**2), (128,128,128), (32,32,32,64)]:
    print ('shape {}, {:.1f} MB'.format(shape, np.zeros(shape).nbytes/1024.**2))

    with timing('from_numpy sent to GPU     '): torch.from_numpy (np.zeros(shape)).cuda()
    with timing('CPU constructor            '): torch.FloatTensor(np.zeros(shape))
    with timing('CPU constructor sent to GPU'): torch.FloatTensor(np.zeros(shape)).cuda()
    with timing('GPU constructor            '): cu.   FloatTensor(np.zeros(shape))
    print


shape (2097152,), 16.0 MB
from_numpy sent to GPU       0.002 seconds
CPU constructor              0.054 seconds
CPU constructor sent to GPU  0.054 seconds
GPU constructor              0.056 seconds
shape (128, 16384), 16.0 MB
from_numpy sent to GPU       0.003 seconds
CPU constructor              0.053 seconds
CPU constructor sent to GPU  0.054 seconds
GPU constructor              0.144 seconds
shape (128, 128, 128), 16.0 MB
from_numpy sent to GPU       0.003 seconds
CPU constructor              0.055 seconds
CPU constructor sent to GPU  0.056 seconds
GPU constructor             12.634 seconds
shape (32, 32, 32, 64), 16.0 MB
from_numpy sent to GPU       0.003 seconds
CPU constructor              0.057 seconds
CPU constructor sent to GPU  0.057 seconds
GPU constructor             25.269 seconds

Conclusion

Should be using torch.from_numpy(x).cuda()

Refer to https://github.com/pytorch/pytorch/issues/1299


In [ ]: